Skip to content

Disallow extra fields other than "@context"#266

Draft
candleindark wants to merge 8 commits intodandi:masterfrom
candleindark:allow-context-in-data
Draft

Disallow extra fields other than "@context"#266
candleindark wants to merge 8 commits intodandi:masterfrom
candleindark:allow-context-in-data

Conversation

@candleindark
Copy link
Copy Markdown
Member

@candleindark candleindark commented Nov 18, 2024

This PR closes #75. It addresses #75 in the follow manner.

  1. Make the models disallow data instances that have extra fields other than the "@context" field in both Pydantic and JSON level.
  2. The Pydantic models continue to not have a context field.
  3. Data instances with an "@context" field can be validated against a Pydantic model but ignored.
The solution implemented in this PR is based on the following script.
from typing import Any
import json

from pydantic import BaseModel, ConfigDict, model_validator
from pydantic.json_schema import JsonDict, JsonValue
from jsonschema import validate, Draft202012Validator
import jsonschema


def get_dict_without_context(d: Any) -> Any:
    """
    If a given object is a dictionary, return a copy of it without the
    `@context` key. Otherwise, return the input object as is.

    :param d: The given object
    :return: If the object is a dictionary, a copy of it without the `@context` key;
             otherwise, the input object as is.
    """
    if isinstance(d, dict):
        return {k: v for k, v in d.items() if k != "@context"}
    return d


def add_context(json_schema: JsonDict) -> None:
    """
    Add the `@context` key to the given JSON schema

    :param json_schema: The dictionary representing the JSON schema

    raises: ValueError if the `@context` key is already present in the given dictionary
    """
    context_key = "@context"
    context_key_title = "@Context"
    properties: JsonDict = json_schema.get("properties", {})
    required: list[JsonValue] = json_schema.get("required", [])

    if context_key in properties or context_key in required:
        msg = f"The '{context_key}' key is already present in the given JSON schema."
        raise ValueError(msg)

    properties[context_key] = {
        "format": "uri",
        "minLength": 1,
        "title": context_key_title,
        "type": "string",
    }
    # required.append(context_key)  # Uncomment this line to make `@context` required

    # Update the schema
    # This is needed to handle the case in which the keys are newly created
    json_schema["properties"] = properties
    json_schema["required"] = required


class Foo(BaseModel):
    x: int

    # Model validator to remove the `"@context"` key from data instance before
    # "base" validation is performed.
    _remove_context_key = model_validator(mode="before")(get_dict_without_context)

    model_config = ConfigDict(extra="forbid", json_schema_extra=add_context)


json_schema_ = Foo.model_json_schema()
print(json.dumps(json_schema_, indent=2))
"""
{
  "additionalProperties": false,
  "properties": {
    "x": {
      "title": "X",
      "type": "integer"
    },
    "@context": {
      "format": "uri",
      "minLength": 1,
      "title": "@Context",
      "type": "string"
    }
  },
  "required": [
    "x",
    "@context"
  ],
  "title": "Foo",
  "type": "object"
}
"""

instance_json_str = '{"x": 1}'
instance_json_str_with_context = '{"@context": "not a valid URI", "x": 1}'
instance_json_str_with_extra = '{"x": 1, "e": 42}'

vv = Foo.model_validate_json(instance_json_str)
print("\n====================================")
print(f"vv: {vv!r}")
"vv: Foo(x=1)"

# Ignore the context field in Pydantic level
vv_with_context = Foo.model_validate_json(instance_json_str_with_context)
print("\n====================================")
print(f"vv_with_context: {vv_with_context!r}")
"vv_with_context: Foo(x=1)"

# Disallow other extra fields in Pydantic level
try:
    Foo.model_validate_json(instance_json_str_with_extra)
except ValueError as e:
    print("\n====================================")
    print(e)
    """
    1 validation error for Foo
    e
      Extra inputs are not permitted [type=extra_forbidden, input_value=42, input_type=int]
        For further information visit https://errors.pydantic.dev/2.9/v/extra_forbidden
    """

instance = {"@context": "https://schema.org", "x": 1}
instance_with_invalid_context = {"@context": "invalid context", "x": 1}
instance_missing_context = {"x": 1}
instance_with_extra = {"@context": "https://schema.org", "x": 1, "e": 42}

# Validate an instance with valid context and x field
validate(instance, json_schema_, format_checker=Draft202012Validator.FORMAT_CHECKER)

# Instance with invalid context fails validation
try:
    validate(
        instance_with_invalid_context,
        json_schema_,
        format_checker=Draft202012Validator.FORMAT_CHECKER,
    )
except jsonschema.exceptions.ValidationError as e:
    print("\n====================================")
    print(e)
    """
    'invalid context' is not a 'uri'
    Failed validating 'format' in schema['properties']['@context']:
        {'format': 'uri', 'minLength': 1, 'title': '@Context', 'type': 'string'}
    On instance['@context']:
        'invalid context'
    """

# The context field is optional
validate(
    instance_missing_context,
    json_schema_,
    format_checker=Draft202012Validator.FORMAT_CHECKER,
)
print("\n====================================")
print("Instance without the `@context` key is valid")
"Instance without the `@context` key is valid"


# Instance with extra field fails validation
try:
    validate(
        instance_with_extra,
        json_schema_,
        format_checker=Draft202012Validator.FORMAT_CHECKER,
    )
except jsonschema.exceptions.ValidationError as e:
    print("\n====================================")
    print(e)
    """
    Additional properties are not allowed ('e' was unexpected)
    Failed validating 'additionalProperties' in schema:
        {'additionalProperties': False,
         'properties': {'x': {'title': 'X', 'type': 'integer'},
                        '@context': {'format': 'uri',
                                     'minLength': 1,
                                     'title': '@Context',
                                     'type': 'string'}},
         'required': ['x', '@context'],
         'title': 'Foo',
         'type': 'object'}
    On instance:
        {'@context': 'https://schema.org', 'x': 1, 'e': 42}
    """

TODOs

Loading
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

add "unevaluatedProperties": false, (or "additionalProperties": false,) to jsonschema dump

3 participants